library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
ufo <- read_csv("UFO_and_Weather.csv")
## New names:
## Rows: 22482 Columns: 18
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (4): city, state, shape, text dbl (12): ...1, city_latitude, city_longitude,
## year, month, day, hour, temp... lgl (1): snow dttm (1): date_time
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
ufo[duplicated(ufo) | duplicated(ufo, fromLast=TRUE), ]
## # A tibble: 0 × 18
## # … with 18 variables: ...1 <dbl>, city <chr>, state <chr>, date_time <dttm>,
## # shape <chr>, text <chr>, city_latitude <dbl>, city_longitude <dbl>,
## # year <dbl>, month <dbl>, day <dbl>, hour <dbl>, temperature <dbl>,
## # relative_humidity <dbl>, precipitation <dbl>, snow <lgl>,
## # wind_direction <dbl>, wind_speed <dbl>
ufo1 <- ufo %>%
group_by(state) %>%
summarize(count = n())
l <- list(color = toRGB("white"), width = 2)
g <- list(
scope = 'usa',
projection = list(type = 'albers usa'),
showlakes = TRUE,
lakecolor = toRGB('white')
)
fig <- plot_geo(ufo1, locationmode = 'USA-states')
fig <- fig %>% add_trace(
z = ~count, text = ~count, locations = ~state,
color = ~count, colors = 'Purples'
)
fig <- fig %>% colorbar(title = "counts")
fig <- fig %>% layout(
title = 'UFO sightings in the US from 2015-2019',
geo = g
)
fig
https://www.kaggle.com/datasets/peretzcohen/2019-census-us-population-data-by-state
us_pop <- read_csv("2019_Census_US_Population_Data_By_State_Lat_Long.csv")
## Rows: 51 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): STATE
## dbl (3): POPESTIMATE2019, lat, long
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
state <- data.frame(state.abb, state.name)
us_pop1 <- us_pop %>%
left_join(state, by = c("STATE" = "state.name"))
us_pop1$state.abb[us_pop1["STATE"] == "District of Columbia"] = "DC"
us_pop1
## # A tibble: 51 × 5
## STATE POPESTIMATE2019 lat long state.abb
## <chr> <dbl> <dbl> <dbl> <chr>
## 1 Alabama 4903185 32.4 -86.3 AL
## 2 Alaska 731545 58.3 -134. AK
## 3 Arizona 7278717 33.4 -112. AZ
## 4 Arkansas 3017804 34.7 -92.3 AR
## 5 California 39512223 38.6 -121. CA
## 6 Colorado 5758736 39.7 -105. CO
## 7 Connecticut 3565287 41.8 -72.7 CT
## 8 Delaware 973764 39.2 -75.5 DE
## 9 District of Columbia 705749 38.9 -77.0 DC
## 10 Florida 21477737 30.4 -84.3 FL
## # … with 41 more rows
ufo2 <- ufo1 %>%
inner_join(us_pop1, by = c("state" = "state.abb")) %>%
mutate(proportion = count / POPESTIMATE2019) %>%
select(state, proportion)
l <- list(color = toRGB("white"), width = 2)
g <- list(
scope = 'usa',
projection = list(type = 'albers usa'),
showlakes = TRUE,
lakecolor = toRGB('white')
)
fig <- plot_geo(ufo2, locationmode = 'USA-states')
fig <- fig %>% add_trace(
z = ~proportion, locations = ~state,
color = ~proportion, colors = 'Reds'
)
fig <- fig %>% colorbar(title = "proportions")
fig <- fig %>% layout(
title = 'UFO sightings per population in the US from 2015-2019',
geo = g
)
fig
ufo %>% group_by(city, state) %>%
summarise(n = n())
## `summarise()` has grouped output by 'city'. You can override using the
## `.groups` argument.
## # A tibble: 6,837 × 3
## # Groups: city [5,283]
## city state n
## <chr> <chr> <int>
## 1 Abbotsford BC 1
## 2 Abbott TX 1
## 3 Aberdeen MD 2
## 4 Aberdeen NC 1
## 5 Aberdeen SD 2
## 6 Aberdeen WA 4
## 7 Abilene KS 2
## 8 Abilene TX 4
## 9 Abingdon MD 2
## 10 Abingdon VA 3
## # … with 6,827 more rows